This is a statistical analysis of the prevalence of TST reactivity among household contacts of index TB cases in the HomeACF Study, a cluster randomised trial done in two provinces of South Africa.
This documents reports revised analysis following reviewer’s comments received on 25th Februrary 2020.
Load all required packages for analysis.
Import data required for the analysis.
Now do some tidying up of data.
Here we do multiple imputation using a flexible additive regression model with bootstrapping.
mdata <- tstsa %>%
dplyr::select(record_id, contact_id, tstdiam_h,
sex_h, ageyears_h, hivfinal_h,timespent_h,
sharebedroom_h, site, ageyears_i, sex_i,
smoke_i, hiv_i, microconf_i, num_contacts,
smoke_h, totalrooms_h, windows_h) %>%
mutate(hiv_i = case_when(hiv_i=="c) HIV-unknown" ~ NA_character_,
TRUE ~ hiv_i)) %>%
mutate(hivfinal_h = case_when(hivfinal_h=="c) HIV unknown" ~ NA_character_,
TRUE ~ hivfinal_h))
imp1 <- aregImpute(formula = ~ sex_h + ageyears_h + hivfinal_h +timespent_h +
sharebedroom_h + site + ageyears_i + sex_i +
smoke_i + hiv_i + microconf_i + num_contacts + totalrooms_h + windows_h,
data = mdata, n.impute=5, burnin = 3, nk=3, type="pmm",
pmmtype = 1)## Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
##
## Multiple Imputation using Bootstrap and PMM
##
## aregImpute(formula = ~sex_h + ageyears_h + hivfinal_h + timespent_h +
## sharebedroom_h + site + ageyears_i + sex_i + smoke_i + hiv_i +
## microconf_i + num_contacts + totalrooms_h + windows_h, data = mdata,
## n.impute = 5, nk = 3, type = "pmm", pmmtype = 1, burnin = 3)
##
## n: 2985 p: 14 Imputations: 5 nk: 3
##
## Number of NAs:
## sex_h ageyears_h hivfinal_h timespent_h sharebedroom_h
## 0 0 151 3 0
## site ageyears_i sex_i smoke_i hiv_i
## 0 0 0 0 145
## microconf_i num_contacts totalrooms_h windows_h
## 0 0 203 9
##
## type d.f.
## sex_h c 1
## ageyears_h s 2
## hivfinal_h c 1
## timespent_h c 2
## sharebedroom_h c 1
## site c 1
## ageyears_i s 2
## sex_i c 1
## smoke_i c 2
## hiv_i c 1
## microconf_i c 1
## num_contacts s 2
## totalrooms_h s 2
## windows_h s 1
##
## Transformation of Target Variables Forced to be Linear
##
## R-squares for Predicting Non-Missing Values for Each Variable
## Using Last Imputations of Predictors
## hivfinal_h timespent_h hiv_i totalrooms_h windows_h
## 0.165 0.088 0.217 0.738 0.766
mdata_imp <- as.data.frame(impute.transcan(imp1, imputation=1, data=mdata, list.out=TRUE, pr=FALSE, check=FALSE))
mdata_imp %>% janitor::tabyl(hiv_i)## hiv_i n percent
## a) HIV-negative 1334 0.4469012
## b) HIV-positive 1651 0.5530988
## hivfinal_h n percent
## a) HIV negative 2604 0.8723618
## b) HIV positive 381 0.1276382
## timespent_h n percent
## a) every now and again 366 0.1226131
## b) part of the day 1402 0.4696817
## c) most of the day 1217 0.4077052
mdata_imp <- mdata %>%
dplyr::select(record_id, contact_id, tstdiam_h,smoke_h) %>%
bind_cols(mdata_imp)
mdata_imp <- mdata_imp %>%
ungroup() %>%
haven::zap_labels()
mdata_imp <- mdata_imp %>%
mutate(hivfinal_h = as.factor(as.vector(hivfinal_h)),
timespent_h = as.factor(as.vector(timespent_h)),
hiv_i = as.factor(as.vector(hiv_i)),
record_id = as.character(record_id),
contact_id = as.character(contact_id),
totalrooms_h = as.numeric(totalrooms_h))
glimpse(mdata_imp)## Observations: 2,985
## Variables: 18
## $ record_id <chr> "1-00-0101", "1-00-0101", "1-00-0101", "1-00-0101", "1…
## $ contact_id <chr> "1-00-0101-02", "1-00-0101-03", "1-00-0101-05", "1-00-…
## $ tstdiam_h <dbl> 0, 0, 0, 0, 0, NA, NA, 0, 5, 20, NA, NA, 7, 5, 0, 13, …
## $ smoke_h <chr> "a) never smoked", "c) previous smoker", "a) never smo…
## $ sex_h <fct> Female, Male, Male, Female, Male, Male, Female, Male, …
## $ ageyears_h <dbl> 35, 59, 13, 55, 34, 7, 37, 8, 41, 22, 28, 41, 14, 76, …
## $ hivfinal_h <fct> b) HIV positive, a) HIV negative, a) HIV negative, a) …
## $ timespent_h <fct> c) most of the day, b) part of the day, c) most of the…
## $ sharebedroom_h <fct> Yes, No, No, No, No, No, No, No, No, No, No, No, No, N…
## $ site <fct> Mangaung, Mangaung, Mangaung, Mangaung, Mangaung, Mang…
## $ ageyears_i <dbl> 15, 15, 15, 15, 15, 15, 49, 49, 49, 49, 49, 49, 49, 49…
## $ sex_i <fct> Female, Female, Female, Female, Female, Female, Male, …
## $ smoke_i <fct> a) never smoked, a) never smoked, a) never smoked, a) …
## $ hiv_i <fct> b) HIV-positive, b) HIV-positive, b) HIV-positive, b) …
## $ microconf_i <fct> b) Micro-confirmed TB, b) Micro-confirmed TB, b) Micro…
## $ num_contacts <int> 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 3, 3, 3, 2, …
## $ totalrooms_h <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, …
## $ windows_h <dbl> 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, …
Make a table of baseline characteristics by study site.
First individual-level characteristics.
## Observations: 2,985
## Variables: 54
## Groups: record_id [924]
## $ record_id <chr> "1-00-0101", "1-00-0101", "1-00-0101", "1-00-0101", "1…
## $ contact_id <chr> "1-00-0101-02", "1-00-0101-03", "1-00-0101-05", "1-00-…
## $ sex_h <chr> "Female", "Male", "Male", "Female", "Male", "Male", "F…
## $ relationship_h <chr> "Parent/Parent-in-law", "Grandparent", "Other", "Grand…
## $ employment_h <chr> "Not employed", "Not employed", "Not employed", "Not e…
## $ airspace_h <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"…
## $ timespent_h <chr> "c) most of the day", "b) part of the day", "c) most o…
## $ genhealth_h <chr> "a) No problems", "a) No problems", "a) No problems", …
## $ weight_h <dbl> 78, 78, 50, 58, 54, 31, 78, 21, 71, 64, 60, 68, 55, 82…
## $ height_h <dbl> 154, 175, 150, 148, 168, 121, 166, 132, 159, 154, 176,…
## $ smoke_h <chr> "a) never smoked", "c) previous smoker", "a) never smo…
## $ alcohol_h <chr> "No", "Yes", "No", "No", "Yes", "No", "No", "No", "No"…
## $ art_h <chr> "b) Taking ART", NA, NA, NA, NA, NA, "b) Taking ART", …
## $ cough_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ haemopt_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ wl_h <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "Yes"…
## $ ns_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ fev_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ diabetes_h <chr> "No", "No", "No", "Yes", "No", "No", "No", "No", "No",…
## $ h59 <date> 2017-05-12, 2017-05-12, 2017-05-12, 2017-05-12, 2017-…
## $ h60 <date> 2017-05-15, 2017-05-15, 2017-05-15, 2017-05-15, 2017-…
## $ tstdiam_h <dbl+lbl> 0, 0, 0, 0, 0, NA, NA, 0, 5, 20, NA, NA, 7, 5, 0, …
## $ hivfinal_h <chr> "b) HIV positive", "a) HIV negative", "a) HIV negative…
## $ ageyears_h <dbl> 35, 59, 13, 55, 34, 7, 37, 8, 41, 22, 28, 41, 14, 76, …
## $ sleepsamebed_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ sharebedroom_h <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No",…
## $ bmi_h <dbl> 32.9, 25.5, 22.2, 26.5, 19.1, 21.2, 28.3, 12.1, 28.1, …
## $ tstdone_h <dbl> 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, …
## $ tstread_h <dbl> 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, …
## $ site <chr> "Mangaung", "Mangaung", "Mangaung", "Mangaung", "Manga…
## $ dead_i <chr> "a) Not deceased", "a) Not deceased", "a) Not deceased…
## $ sex_i <chr> "Female", "Female", "Female", "Female", "Female", "Fem…
## $ xpert_i <chr> "b) Xpert positive", "b) Xpert positive", "b) Xpert po…
## $ culture_i <chr> "c) Culture not done", "c) Culture not done", "c) Cult…
## $ smear_i <chr> "c) Smear not done", "c) Smear not done", "c) Smear no…
## $ weight_i <dbl> 39, 39, 39, 39, 39, 39, 55, 55, 55, 55, 55, 55, 55, 55…
## $ height_i <dbl> 159, 159, 159, 159, 159, 159, 178, 178, 178, 178, 178,…
## $ smoke_i <chr> "a) never smoked", "a) never smoked", "a) never smoked…
## $ alcohol_i <chr> "a) Not alcohol drinker", "a) Not alcohol drinker", "a…
## $ hiv_i <chr> "b) HIV-positive", "b) HIV-positive", "b) HIV-positive…
## $ cd4_i <dbl> NA, NA, NA, NA, NA, NA, 346, 346, 346, 346, 346, 346, …
## $ art_i <chr> "b) Taking ART", "b) Taking ART", "b) Taking ART", "b)…
## $ coughdays_i <dbl> 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 0, …
## $ ageyears_i <dbl> 15, 15, 15, 15, 15, 15, 49, 49, 49, 49, 49, 49, 49, 49…
## $ microconf_i <chr> "b) Micro-confirmed TB", "b) Micro-confirmed TB", "b) …
## $ bmi_i <dbl> 15.4, 15.4, 15.4, 15.4, 15.4, 15.4, 17.4, 17.4, 17.4, …
## $ housetype_h <chr> "a) Brick/concerete house, apartment", "a) Brick/conce…
## $ totalrooms_h <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, …
## $ windows_h <dbl> 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, …
## $ numsmokers_h <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, …
## $ pipedwater_h <chr> "c) Piped in yard", "c) Piped in yard", "c) Piped in y…
## $ toilet_h <chr> "a) Flush/chemical toilet", "a) Flush/chemical toilet"…
## $ death1year_h <chr> "a) No", "a) No", "a) No", "a) No", "a) No", "a) No", …
## $ num_contacts <int> 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 3, 3, 3, 2, …
table1a <- tableby(site ~ sex_i + ageyears_i + bmi_i +
dead_i + xpert_i + smear_i + culture_i +
microconf_i + smoke_i + hiv_i +
coughdays_i,
data = index2, test=TRUE,
numeric.stats=c("medianq1q3"), digits=3)
summary(table1a)##
##
## | | Capricorn (N=407) | Mangaung (N=517) | Total (N=924) | p value|
## |:-------------------------------------------|:-----------------------:|:-----------------------:|:-----------------------:|-------:|
## |**sex_i** | | | | 0.073|
## | Female | 174 (42.8%) | 191 (36.9%) | 365 (39.5%) | |
## | Male | 233 (57.2%) | 326 (63.1%) | 559 (60.5%) | |
## |**ageyears_i** | | | | 0.784|
## | Median (Q1, Q3) | 37.000 (29.000, 47.000) | 37.000 (28.000, 48.000) | 37.000 (28.000, 47.250) | |
## |**bmi_i** | | | | 0.288|
## | Median (Q1, Q3) | 19.100 (17.100, 22.100) | 18.700 (16.700, 21.600) | 18.800 (16.800, 21.825) | |
## |**dead_i** | | | | 0.407|
## | a) Not deceased | 393 (96.6%) | 504 (97.5%) | 897 (97.1%) | |
## | b) Deceased | 14 (3.4%) | 13 (2.5%) | 27 (2.9%) | |
## |**xpert_i** | | | | < 0.001|
## | a) Xpert negative | 6 (1.5%) | 1 (0.2%) | 7 (0.8%) | |
## | b) Xpert positive | 348 (85.5%) | 508 (98.3%) | 856 (92.6%) | |
## | c) Xpert not done | 53 (13.0%) | 8 (1.5%) | 61 (6.6%) | |
## |**smear_i** | | | | < 0.001|
## | a) Smear negative | 22 (5.4%) | 2 (0.4%) | 24 (2.6%) | |
## | b) Smear positive | 126 (31.0%) | 8 (1.5%) | 134 (14.5%) | |
## | c) Smear not done | 259 (63.6%) | 507 (98.1%) | 766 (82.9%) | |
## |**culture_i** | | | | < 0.001|
## | a) Culture negative | 1 (0.2%) | 2 (0.4%) | 3 (0.3%) | |
## | b) Culture positive | 32 (7.9%) | 0 (0.0%) | 32 (3.5%) | |
## | c) Culture not done | 374 (91.9%) | 515 (99.6%) | 889 (96.2%) | |
## |**microconf_i** | | | | 0.061|
## | a) Not micro-confirmed TB | 14 (3.4%) | 8 (1.5%) | 22 (2.4%) | |
## | b) Micro-confirmed TB | 393 (96.6%) | 509 (98.5%) | 902 (97.6%) | |
## |**smoke_i** | | | | < 0.001|
## | a) never smoked | 267 (65.6%) | 253 (48.9%) | 520 (56.3%) | |
## | b) current smoker | 49 (12.0%) | 127 (24.6%) | 176 (19.0%) | |
## | c) previous smoker | 91 (22.4%) | 137 (26.5%) | 228 (24.7%) | |
## |**hiv_i** | | | | 0.021|
## | a) HIV-negative | 178 (43.7%) | 207 (40.0%) | 385 (41.7%) | |
## | b) HIV-positive | 219 (53.8%) | 278 (53.8%) | 497 (53.8%) | |
## | c) HIV-unknown | 10 (2.5%) | 32 (6.2%) | 42 (4.5%) | |
## |**coughdays_i** | | | | 0.782|
## | Median (Q1, Q3) | 30.000 (0.000, 60.000) | 30.000 (9.500, 60.000) | 30.000 (7.000, 60.000) | |
Now the household contact characteristics
table1b <- tableby(site ~ sex_h + ageyears_h +
airspace_h + timespent_h + sleepsamebed_h + sharebedroom_h +
smoke_h +
hivfinal_h,
data=tstsa, test=TRUE,
numeric.stats=c("medianrange"), digits=1)
summary(table1b)##
##
## | | Capricorn (N=1298) | Mangaung (N=1687) | Total (N=2985) | p value|
## |:----------------------------------------|:------------------:|:-----------------:|:----------------:|-------:|
## |**sex_h** | | | | 0.964|
## | Female | 803 (61.9%) | 1045 (61.9%) | 1848 (61.9%) | |
## | Male | 495 (38.1%) | 642 (38.1%) | 1137 (38.1%) | |
## |**ageyears_h** | | | | 0.075|
## | Median (Range) | 17.0 (0.0, 98.0) | 16.0 (0.0, 95.0) | 17.0 (0.0, 98.0) | |
## |**airspace_h** | | | | 0.381|
## | N-Miss | 1 | 0 | 1 | |
## | No | 0 (0.0%) | 1 (0.1%) | 1 (0.0%) | |
## | Yes | 1297 (100.0%) | 1686 (99.9%) | 2983 (100.0%) | |
## |**timespent_h** | | | | < 0.001|
## | N-Miss | 2 | 1 | 3 | |
## | a) every now and again | 241 (18.6%) | 123 (7.3%) | 364 (12.2%) | |
## | b) part of the day | 628 (48.5%) | 773 (45.8%) | 1401 (47.0%) | |
## | c) most of the day | 427 (32.9%) | 790 (46.9%) | 1217 (40.8%) | |
## |**sleepsamebed_h** | | | | < 0.001|
## | No | 1263 (97.3%) | 1686 (99.9%) | 2949 (98.8%) | |
## | Yes | 35 (2.7%) | 1 (0.1%) | 36 (1.2%) | |
## |**sharebedroom_h** | | | | < 0.001|
## | No | 1184 (91.2%) | 1303 (77.2%) | 2487 (83.3%) | |
## | Yes | 114 (8.8%) | 384 (22.8%) | 498 (16.7%) | |
## |**smoke_h** | | | | 0.004|
## | a) never smoked | 1200 (92.4%) | 1498 (88.8%) | 2698 (90.4%) | |
## | b) current smoker | 83 (6.4%) | 159 (9.4%) | 242 (8.1%) | |
## | c) previous smoker | 15 (1.2%) | 30 (1.8%) | 45 (1.5%) | |
## |**hivfinal_h** | | | | < 0.001|
## | N-Miss | 3 | 0 | 3 | |
## | a) HIV negative | 1136 (87.7%) | 1349 (80.0%) | 2485 (83.3%) | |
## | b) HIV positive | 126 (9.7%) | 223 (13.2%) | 349 (11.7%) | |
## | c) HIV unknown | 33 (2.5%) | 115 (6.8%) | 148 (5.0%) | |
Now the dwelling characteristics
## Observations: 2,985
## Variables: 54
## Groups: record_id [924]
## $ record_id <chr> "1-00-0101", "1-00-0101", "1-00-0101", "1-00-0101", "1…
## $ contact_id <chr> "1-00-0101-02", "1-00-0101-03", "1-00-0101-05", "1-00-…
## $ sex_h <chr> "Female", "Male", "Male", "Female", "Male", "Male", "F…
## $ relationship_h <chr> "Parent/Parent-in-law", "Grandparent", "Other", "Grand…
## $ employment_h <chr> "Not employed", "Not employed", "Not employed", "Not e…
## $ airspace_h <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"…
## $ timespent_h <chr> "c) most of the day", "b) part of the day", "c) most o…
## $ genhealth_h <chr> "a) No problems", "a) No problems", "a) No problems", …
## $ weight_h <dbl> 78, 78, 50, 58, 54, 31, 78, 21, 71, 64, 60, 68, 55, 82…
## $ height_h <dbl> 154, 175, 150, 148, 168, 121, 166, 132, 159, 154, 176,…
## $ smoke_h <chr> "a) never smoked", "c) previous smoker", "a) never smo…
## $ alcohol_h <chr> "No", "Yes", "No", "No", "Yes", "No", "No", "No", "No"…
## $ art_h <chr> "b) Taking ART", NA, NA, NA, NA, NA, "b) Taking ART", …
## $ cough_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ haemopt_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ wl_h <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "Yes"…
## $ ns_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ fev_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ diabetes_h <chr> "No", "No", "No", "Yes", "No", "No", "No", "No", "No",…
## $ h59 <date> 2017-05-12, 2017-05-12, 2017-05-12, 2017-05-12, 2017-…
## $ h60 <date> 2017-05-15, 2017-05-15, 2017-05-15, 2017-05-15, 2017-…
## $ tstdiam_h <dbl+lbl> 0, 0, 0, 0, 0, NA, NA, 0, 5, 20, NA, NA, 7, 5, 0, …
## $ hivfinal_h <chr> "b) HIV positive", "a) HIV negative", "a) HIV negative…
## $ ageyears_h <dbl> 35, 59, 13, 55, 34, 7, 37, 8, 41, 22, 28, 41, 14, 76, …
## $ sleepsamebed_h <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
## $ sharebedroom_h <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No",…
## $ bmi_h <dbl> 32.9, 25.5, 22.2, 26.5, 19.1, 21.2, 28.3, 12.1, 28.1, …
## $ tstdone_h <dbl> 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, …
## $ tstread_h <dbl> 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, …
## $ site <chr> "Mangaung", "Mangaung", "Mangaung", "Mangaung", "Manga…
## $ dead_i <chr> "a) Not deceased", "a) Not deceased", "a) Not deceased…
## $ sex_i <chr> "Female", "Female", "Female", "Female", "Female", "Fem…
## $ xpert_i <chr> "b) Xpert positive", "b) Xpert positive", "b) Xpert po…
## $ culture_i <chr> "c) Culture not done", "c) Culture not done", "c) Cult…
## $ smear_i <chr> "c) Smear not done", "c) Smear not done", "c) Smear no…
## $ weight_i <dbl> 39, 39, 39, 39, 39, 39, 55, 55, 55, 55, 55, 55, 55, 55…
## $ height_i <dbl> 159, 159, 159, 159, 159, 159, 178, 178, 178, 178, 178,…
## $ smoke_i <chr> "a) never smoked", "a) never smoked", "a) never smoked…
## $ alcohol_i <chr> "a) Not alcohol drinker", "a) Not alcohol drinker", "a…
## $ hiv_i <chr> "b) HIV-positive", "b) HIV-positive", "b) HIV-positive…
## $ cd4_i <dbl> NA, NA, NA, NA, NA, NA, 346, 346, 346, 346, 346, 346, …
## $ art_i <chr> "b) Taking ART", "b) Taking ART", "b) Taking ART", "b)…
## $ coughdays_i <dbl> 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 0, …
## $ ageyears_i <dbl> 15, 15, 15, 15, 15, 15, 49, 49, 49, 49, 49, 49, 49, 49…
## $ microconf_i <chr> "b) Micro-confirmed TB", "b) Micro-confirmed TB", "b) …
## $ bmi_i <dbl> 15.4, 15.4, 15.4, 15.4, 15.4, 15.4, 17.4, 17.4, 17.4, …
## $ housetype_h <chr> "a) Brick/concerete house, apartment", "a) Brick/conce…
## $ totalrooms_h <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, …
## $ windows_h <dbl> 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, …
## $ numsmokers_h <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, …
## $ pipedwater_h <chr> "c) Piped in yard", "c) Piped in yard", "c) Piped in y…
## $ toilet_h <chr> "a) Flush/chemical toilet", "a) Flush/chemical toilet"…
## $ death1year_h <chr> "a) No", "a) No", "a) No", "a) No", "a) No", "a) No", …
## $ num_contacts <int> 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 3, 3, 3, 2, …
dwellings <- tstsa %>%
select(record_id, site, num_contacts, totalrooms_h, windows_h,
numsmokers_h) %>%
distinct()
table1c <- tableby(site ~ num_contacts + totalrooms_h + windows_h + numsmokers_h,
data=dwellings, test=TRUE,
numeric.stats=c("medianrange"), digits=1)
summary(table1c)##
##
## | | Capricorn (N=407) | Mangaung (N=517) | Total (N=924) | p value|
## |:---------------------------------------------------------------------|:-----------------:|:----------------:|:---------------:|-------:|
## |**num_contacts** | | | | 0.602|
## | Median (Range) | 3.0 (1.0, 14.0) | 3.0 (1.0, 14.0) | 3.0 (1.0, 14.0) | |
## |**G09. Total rooms in household** | | | | < 0.001|
## | Median (Range) | 6.0 (1.0, 22.0) | 4.0 (1.0, 10.0) | 5.0 (1.0, 22.0) | |
## |**G10. How many windows are there in the household?** | | | | < 0.001|
## | Median (Range) | 8.0 (0.0, 27.0) | 4.0 (0.0, 15.0) | 5.0 (0.0, 27.0) | |
## |**G15. How many people smoke tobacco inside the household? __ __ __** | | | | 0.068|
## | Median (Range) | 0.0 (0.0, 6.0) | 0.0 (0.0, 4.0) | 0.0 (0.0, 6.0) | |
Number of contacts per index case
tstsa %>%
group_by(record_id, site) %>%
count() %>%
group_by(n, site) %>%
summarise(contacts = sum(n)) %>%
ggplot() +
geom_bar(aes(x=n, y=contacts, fill=site), stat="identity") +
facet_wrap(~site) +
theme_bw(base_family = "Oswald") +
theme(legend.position = "none") +
labs(x="Number of household contacts per index case",
y = "N")Graph by age-group of proportion with TST >=5 mm, stratified by key characteristics
agegps <- mdata_imp %>%
mutate(agegroup_h = case_when(ageyears_h <5 ~ "0-4",
ageyears_h >=5 & ageyears_h <10 ~ "05-09",
ageyears_h >=10 & ageyears_h <15 ~ "10-14",
ageyears_h >=15 & ageyears_h <25 ~ "15-24",
ageyears_h >=25 & ageyears_h <35 ~ "25-34",
ageyears_h >=35 & ageyears_h <45 ~ "35-44",
ageyears_h >=45 & ageyears_h <55 ~ "45-54",
ageyears_h >=55 ~ "55+"),
ordered=TRUE) %>%
mutate(tst_5mm = case_when(tstdiam_h<5 ~ 0,
tstdiam_h>=5 ~ 1)) %>%
mutate(tst_10mm = case_when(tstdiam_h<10 ~ 0,
tstdiam_h>=10 ~ 1))
agegps %>%
janitor::tabyl(agegroup_h, site) %>%
adorn_totals("col") %>%
adorn_percentages("col") %>%
adorn_pct_formatting() %>%
adorn_ns()## agegroup_h Capricorn Mangaung Total
## 0-4 15.1% (196) 14.8% (249) 14.9% (445)
## 05-09 15.9% (206) 16.2% (274) 16.1% (480)
## 10-14 13.4% (174) 15.8% (267) 14.8% (441)
## 15-24 16.9% (219) 15.8% (267) 16.3% (486)
## 25-34 10.7% (139) 10.4% (176) 10.6% (315)
## 35-44 6.0% (78) 7.9% (133) 7.1% (211)
## 45-54 6.5% (84) 6.3% (106) 6.4% (190)
## 55+ 15.6% (202) 12.7% (215) 14.0% (417)
| tst_5mm | n | percent | valid_percent |
|---|---|---|---|
| 0 | 2267 | 0.75946399 | 0.8319266 |
| 1 | 458 | 0.15343384 | 0.1680734 |
| NA | 260 | 0.08710218 | NA |
| tst_10mm | n | percent | valid_percent |
|---|---|---|---|
| 0 | 2366 | 0.79262982 | 0.8682569 |
| 1 | 359 | 0.12026801 | 0.1317431 |
| NA | 260 | 0.08710218 | NA |
Look at the bins of TST diameter
tstsa %>%
group_by(tstdiam_h, site) %>%
summarise(n= n()) %>%
mutate(prop = n/sum(n)) %>%
filter(!is.na(tstdiam_h)) %>%
ggplot() +
geom_bar(aes(x=tstdiam_h, y=n), stat = "identity") +
theme_light() +
facet_grid(site~.) +
scale_y_continuous() +
labs(title="Tuberculin skin test reactivity in household contacts",
x = "Diameter (mm)",
y = "Percent")## Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous.
Proportions of household contacts with TST induration >=5mm
#Get the prevalence with TST induration >=5mm overall
g0 <- agegps %>%
group_by(agegroup_h, tst_5mm) %>%
summarise(n=n()) %>%
mutate(sum = sum(n)) %>%
filter(tst_5mm==1) %>%
rowwise %>%
mutate(xx = list(broom::tidy(binom.test(n, sum, conf.level=0.95)))) %>%
tidyr::unnest(xx)
g0 ## # A tibble: 8 x 12
## agegroup_h tst_5mm n sum estimate statistic p.value parameter conf.low
## <chr> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0-4 1 58 445 0.130 58 9.10e-61 445 0.100
## 2 05-09 1 62 480 0.129 62 6.70e-66 480 0.100
## 3 10-14 1 57 441 0.129 57 1.25e-60 441 0.0994
## 4 15-24 1 70 486 0.144 70 6.27e-61 486 0.114
## 5 25-34 1 57 315 0.181 57 1.08e-31 315 0.140
## 6 35-44 1 41 211 0.194 41 7.33e-20 211 0.143
## 7 45-54 1 36 190 0.189 36 1.39e-18 190 0.136
## 8 55+ 1 77 417 0.185 77 1.65e-40 417 0.149
## # … with 3 more variables: conf.high <dbl>, method <chr>, alternative <chr>
#plot
g0 %>%
ggplot() +
geom_bar(aes(x=agegroup_h, y=estimate), fill = "firebrick", stat = "identity") +
geom_errorbar(aes(x=agegroup_h, ymin=conf.low, ymax = conf.high), width = 0.2) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family="Oswald") +
labs(y="",
x="") +
theme(legend.position = "none") +
theme(axis.text.x=element_text(angle=45, hjust=1))#Get the propotion with TST induraction >=5mm by site
g0_b <- agegps %>%
group_by(agegroup_h, tst_5mm, site) %>%
summarise(n=n()) %>%
mutate(sum = sum(n)) %>%
filter(tst_5mm==1) %>%
rowwise %>%
mutate(xx = list(broom::tidy(binom.test(n, sum, conf.level=0.95)))) %>%
tidyr::unnest(xx)
g0_b## # A tibble: 16 x 13
## agegroup_h tst_5mm site n sum estimate statistic p.value parameter
## <chr> <dbl> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0-4 1 Capr… 13 58 0.224 13 3.01e-5 58
## 2 0-4 1 Mang… 45 58 0.776 45 3.01e-5 58
## 3 05-09 1 Capr… 14 62 0.226 14 1.74e-5 62
## 4 05-09 1 Mang… 48 62 0.774 48 1.74e-5 62
## 5 10-14 1 Capr… 19 57 0.333 19 1.63e-2 57
## 6 10-14 1 Mang… 38 57 0.667 38 1.63e-2 57
## 7 15-24 1 Capr… 18 70 0.257 18 5.85e-5 70
## 8 15-24 1 Mang… 52 70 0.743 52 5.85e-5 70
## 9 25-34 1 Capr… 15 57 0.263 15 4.60e-4 57
## 10 25-34 1 Mang… 42 57 0.737 42 4.60e-4 57
## 11 35-44 1 Capr… 15 41 0.366 15 1.17e-1 41
## 12 35-44 1 Mang… 26 41 0.634 26 1.17e-1 41
## 13 45-54 1 Capr… 15 36 0.417 15 4.05e-1 36
## 14 45-54 1 Mang… 21 36 0.583 21 4.05e-1 36
## 15 55+ 1 Capr… 24 77 0.312 24 1.26e-3 77
## 16 55+ 1 Mang… 53 77 0.688 53 1.26e-3 77
## # … with 4 more variables: conf.low <dbl>, conf.high <dbl>, method <chr>,
## # alternative <chr>
#plot the proportion with TST induration >=5mm by site
g0_b %>%
ggplot() +
geom_bar(aes(x=agegroup_h, y=estimate, fill = site), stat = "identity") +
geom_errorbar(aes(x=agegroup_h, ymin=conf.low, ymax = conf.high), width = 0.2) +
facet_wrap(site~.) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family="Oswald") +
labs(y="",
x="") +
theme(legend.position = "none") +
theme(axis.text.x=element_text(angle=45, hjust=1))Graph with two cut-offs for TST diameter
agegps <- mdata_imp %>%
mutate(agegroup_h = case_when(ageyears_h <5 ~ "0-4",
ageyears_h >=5 & ageyears_h <10 ~ "05-09",
ageyears_h >=10 & ageyears_h <15 ~ "10-14",
ageyears_h >=15 & ageyears_h <25 ~ "15-24",
ageyears_h >=25 & ageyears_h <35 ~ "25-34",
ageyears_h >=35 & ageyears_h <45 ~ "35-44",
ageyears_h >=45 & ageyears_h <55 ~ "45-54",
ageyears_h >=55 ~ "55+"),
ordered=TRUE) %>%
mutate(tst_5mm = case_when(tstdiam_h<5 ~ 0,
tstdiam_h>=5 ~ 1)) %>%
mutate(tst_10mm = case_when(tstdiam_h<10 ~ 0,
tstdiam_h>=10 ~ 1)) %>%
mutate(hiv_i = case_when(hiv_i== "a) HIV-negative" ~ "Index HIV -ve",
hiv_i== "b) HIV-positive" ~ "Index HIV +ve")) %>%
mutate(hivfinal_h = case_when(hivfinal_h == "a) HIV negative" ~ "Contact HIV -ve",
hivfinal_h == "b) HIV positive" ~ "Contact HIV +ve")) %>%
mutate(timespent_h = case_when(timespent_h == "a) every now and again" ~ "Contact rarely",
timespent_h == "b) part of the day" ~ "Contact part of day",
timespent_h == "c) most of the day" ~ "Contact most of day")) %>%
mutate(sex_i = case_when(sex_i == "Male" ~ "Index male",
sex_i == "Female" ~ "Index female")) %>%
mutate(sex_h = case_when(sex_h == "Male" ~ "Contact male",
sex_h == "Female" ~ "Contact female"))
# Make some tables of the outcome variables
agegps %>%
janitor::tabyl(tst_5mm) %>%
gt()| tst_5mm | n | percent | valid_percent |
|---|---|---|---|
| 0 | 2267 | 0.75946399 | 0.8319266 |
| 1 | 458 | 0.15343384 | 0.1680734 |
| NA | 260 | 0.08710218 | NA |
| tst_10mm | n | percent | valid_percent |
|---|---|---|---|
| 0 | 2366 | 0.79262982 | 0.8682569 |
| 1 | 359 | 0.12026801 | 0.1317431 |
| NA | 260 | 0.08710218 | NA |
#Now cross tab between agegroup and sex
agegps %>%
janitor::tabyl(tst_5mm, agegroup_h, site, show_na = FALSE) %>%
janitor::adorn_percentages("col") ## $Capricorn
## tst_5mm 0-4 05-09 10-14 15-24 25-34 35-44
## 0 0.92397661 0.92893401 0.8908046 0.91705069 0.887218 0.7945205
## 1 0.07602339 0.07106599 0.1091954 0.08294931 0.112782 0.2054795
## 45-54 55+
## 0.8148148 0.8787879
## 0.1851852 0.1212121
##
## $Mangaung
## tst_5mm 0-4 05-09 10-14 15-24 25-34 35-44 45-54
## 0 0.7804878 0.8181818 0.8473896 0.776824 0.7042254 0.74 0.7789474
## 1 0.2195122 0.1818182 0.1526104 0.223176 0.2957746 0.26 0.2210526
## 55+
## 0.7253886
## 0.2746114
agegps %>%
filter(!is.na(tstdiam_h)) %>%
janitor::tabyl(tst_10mm, agegroup_h, site, show_na = FALSE) %>%
janitor::adorn_percentages("col")## $Capricorn
## tst_10mm 0-4 05-09 10-14 15-24 25-34 35-44
## 0 0.97660819 0.96446701 0.93678161 0.94470046 0.8947368 0.890411
## 1 0.02339181 0.03553299 0.06321839 0.05529954 0.1052632 0.109589
## 45-54 55+
## 0.8395062 0.93434343
## 0.1604938 0.06565657
##
## $Mangaung
## tst_10mm 0-4 05-09 10-14 15-24 25-34 35-44 45-54
## 0 0.795122 0.8484848 0.8835341 0.806867 0.7464789 0.77 0.8105263
## 1 0.204878 0.1515152 0.1164659 0.193133 0.2535211 0.23 0.1894737
## 55+
## 0.7720207
## 0.2279793
#write a function to automate these
gdata <- function(data, groupvar, depvar) {
data %>%
ungroup() %>%
select(agegroup_h, {{depvar}}, .data[[groupvar]]) %>%
filter(!is.na({{depvar}})) %>%
group_by(agegroup_h, .data[[groupvar]], {{depvar}}) %>%
summarise(n=n()) %>%
mutate(sum = sum(n)) %>%
filter({{depvar}}==1) %>%
rowwise %>%
mutate(xx = list(broom::tidy(binom.test(n, sum, conf.level=0.95)))) %>%
tidyr::unnest(xx) %>%
select(agegroup_h, var = .data[[groupvar]], n, sum, estimate, conf.low, conf.high)
}
#select key variables for exploratory plotting
plot_vars <- agegps %>%
ungroup() %>%
select(site, sex_i, sex_h, hiv_i, hivfinal_h, timespent_h) %>%
names()
five_mm <- map(plot_vars, ~gdata(data = agegps, groupvar = .x, depvar = tst_5mm))
five_mm <- map(five_mm, ~mutate(., tst=">=5mm"))
ten_mm <- map(plot_vars, ~gdata(data = agegps, groupvar = .x, depvar = tst_10mm))
ten_mm <- map(ten_mm, ~mutate(., tst=">=10mm"))
gdata_out <- bind_rows(five_mm, ten_mm)## Warning in bind_rows_(x, .id): binding factor and character vector, coercing
## into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector, coercing
## into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector, coercing
## into character vector
gdata_out <- gdata_out %>%
mutate(var = fct_relevel(var,
"Mangaung", "Capricorn", "Index male", "Index female",
"Contact male", "Contact female",
"Index HIV +ve", "Index HIV -ve",
"Contact HIV +ve", "Contact HIV -ve",
"Contact rarely", "Contact part of day", "Contact most of day"))
ggplot(data = gdata_out) +
geom_errorbar(aes(x=agegroup_h, ymin=conf.low, ymax = conf.high), width = 0.2) +
geom_point(aes(x=agegroup_h, y=estimate, colour=tst)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
limits = c(0,0.80),
breaks = c(0,0.15,0.30,0.45,0.60,0.75)) +
facet_grid(fct_rev(tst) ~var) +
theme_bw(base_family="Oswald") +
labs(x="Age group of contact (years)",
y="Percentage with tuberculin skin test induration (95% CI)") +
theme(legend.position = "none") +
theme(axis.text.x=element_text(angle=90, hjust=1))A graph of TST positivity by age of household contact
gdata_out2 <- gdata_out %>%
filter(var %in% c("Capricorn", "Mangaung")) %>%
mutate(var = factor(var, levels=c("Capricorn", "Mangaung"), labels=c("Capricorn", "Mangaung")),
tst = factor(tst, levels=c(">=5mm", ">=10mm"), labels=c(">=5mm", ">=10mm")))
gdata_out2 %>%
ggplot() +
geom_line(aes(y=estimate, x=agegroup_h, group=1)) +
geom_point(aes(y=estimate, x=agegroup_h, group=1)) +
geom_line(aes(y=conf.low, x=agegroup_h), group=1, linetype=4) +
geom_line(aes(y=conf.high, x=agegroup_h), group=1, linetype=4) +
facet_grid(tst~ var) +
theme_bw(base_family = "Open Sans Condensed Light") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(x="Age group of household contact (years)",
y="Percentage with TST induration (95% confidence interval)")Supplemental table 1: Compare characteristics of household contacts who did and did not receive TST.
s1 <- tstsa %>%
mutate(missing_tst = case_when(is.na(tstdiam_h) ~ "TST not done",
TRUE ~ "TST done"))
table_s1b <- tableby(missing_tst ~ site + sex_h + ageyears_h + bmi_h + relationship_h +
employment_h +
airspace_h + timespent_h + sleepsamebed_h + sharebedroom_h +
smoke_h + alcohol_h +
hivfinal_h + art_h + diabetes_h,
data=s1, test = FALSE,
numeric.stats=c("medianq1q3"),cat.stats = c("countrowpct"), digits=1)
summary(table_s1b)##
##
## | | TST done (N=2725) | TST not done (N=260) | Total (N=2985) |
## |:------------------------------------------------------|:-----------------:|:--------------------:|:-----------------:|
## |**site** | | | |
## | Capricorn | 1244 (95.8%) | 54 (4.2%) | 1298 (100.0%) |
## | Mangaung | 1481 (87.8%) | 206 (12.2%) | 1687 (100.0%) |
## |**sex_h** | | | |
## | Female | 1694 (91.7%) | 154 (8.3%) | 1848 (100.0%) |
## | Male | 1031 (90.7%) | 106 (9.3%) | 1137 (100.0%) |
## |**ageyears_h** | | | |
## | Median (Q1, Q3) | 16.0 (8.0, 37.0) | 21.0 (4.0, 37.2) | 17.0 (8.0, 37.0) |
## |**bmi_h** | | | |
## | Median (Q1, Q3) | 20.2 (16.6, 25.7) | 19.8 (16.4, 26.4) | 20.1 (16.6, 25.8) |
## |**relationship_h** | | | |
## | Brother/sister | 433 (91.9%) | 38 (8.1%) | 471 (100.0%) |
## | Child | 671 (91.5%) | 62 (8.5%) | 733 (100.0%) |
## | Grandchild | 273 (89.8%) | 31 (10.2%) | 304 (100.0%) |
## | Grandparent | 73 (96.1%) | 3 (3.9%) | 76 (100.0%) |
## | Other | 743 (93.0%) | 56 (7.0%) | 799 (100.0%) |
## | Parent/Parent-in-law | 318 (92.2%) | 27 (7.8%) | 345 (100.0%) |
## | Spouse | 213 (83.2%) | 43 (16.8%) | 256 (100.0%) |
## |**employment_h** | | | |
## | Agriculture | 5 (71.4%) | 2 (28.6%) | 7 (100.0%) |
## | Construction/Manufacturing/Transport | 38 (86.4%) | 6 (13.6%) | 44 (100.0%) |
## | Hospitality | 21 (91.3%) | 2 (8.7%) | 23 (100.0%) |
## | Not employed | 2564 (91.7%) | 233 (8.3%) | 2797 (100.0%) |
## | Other | 97 (85.1%) | 17 (14.9%) | 114 (100.0%) |
## |**airspace_h** | | | |
## | No | 0 (0.0%) | 1 (100.0%) | 1 (100.0%) |
## | Yes | 2724 (91.3%) | 259 (8.7%) | 2983 (100.0%) |
## |**timespent_h** | | | |
## | a) every now and again | 345 (94.8%) | 19 (5.2%) | 364 (100.0%) |
## | b) part of the day | 1310 (93.5%) | 91 (6.5%) | 1401 (100.0%) |
## | c) most of the day | 1068 (87.8%) | 149 (12.2%) | 1217 (100.0%) |
## |**sleepsamebed_h** | | | |
## | No | 2689 (91.2%) | 260 (8.8%) | 2949 (100.0%) |
## | Yes | 36 (100.0%) | 0 (0.0%) | 36 (100.0%) |
## |**sharebedroom_h** | | | |
## | No | 2296 (92.3%) | 191 (7.7%) | 2487 (100.0%) |
## | Yes | 429 (86.1%) | 69 (13.9%) | 498 (100.0%) |
## |**smoke_h** | | | |
## | a) never smoked | 2465 (91.4%) | 233 (8.6%) | 2698 (100.0%) |
## | b) current smoker | 216 (89.3%) | 26 (10.7%) | 242 (100.0%) |
## | c) previous smoker | 44 (97.8%) | 1 (2.2%) | 45 (100.0%) |
## |**alcohol_h** | | | |
## | No | 2406 (91.3%) | 229 (8.7%) | 2635 (100.0%) |
## | Yes | 319 (91.1%) | 31 (8.9%) | 350 (100.0%) |
## |**hivfinal_h** | | | |
## | a) HIV negative | 2335 (94.0%) | 150 (6.0%) | 2485 (100.0%) |
## | b) HIV positive | 293 (84.0%) | 56 (16.0%) | 349 (100.0%) |
## | c) HIV unknown | 94 (63.5%) | 54 (36.5%) | 148 (100.0%) |
## |**art_h** | | | |
## | a) Not taking ART | 35 (81.4%) | 8 (18.6%) | 43 (100.0%) |
## | b) Taking ART | 207 (73.4%) | 75 (26.6%) | 282 (100.0%) |
## |**diabetes_h** | | | |
## | No | 2681 (91.3%) | 255 (8.7%) | 2936 (100.0%) |
## | Yes | 43 (93.5%) | 3 (6.5%) | 46 (100.0%) |
DAG was drawn at dagitty.net
Minimal sufficient adjustment sets for estimating the direct effect of Index case HIV status on Latent TB infection: - Index case age, - Province (site)
Fit a data-generating model including priors only to check priors are sensible.
prior <- c(
prior(normal(0, 2), class = Intercept),
prior(normal(0, 2), class = "b"),
prior(cauchy(0,1), sd)
)
#fit model
prior_fit <- brm(tst_5mm ~
#Main exposure
hiv_i +
#Index case characteristics
ageyears_i +
#Household characteristics
site +
#random intercepts for househod
(1 | record_id),
data=agegps,
family = bernoulli,
prior = prior,
seed = 13,
chains = 1,
sample_prior = "only",
control=list(adapt_delta=0.99))## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -D_REENTRANT -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -include stan/math/prim/mat/fun/Eigen.hpp -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000345 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 3.45 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 9.5119 seconds (Warm-up)
## Chain 1: 9.34062 seconds (Sampling)
## Chain 1: 18.8525 seconds (Total)
## Chain 1:
## Family: bernoulli
## Links: mu = logit
## Formula: tst_5mm ~ hiv_i + ageyears_i + site + (1 | record_id)
## Data: agegps (Number of observations: 2725)
## Samples: 1 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 1000
##
## Group-Level Effects:
## ~record_id (Number of levels: 877)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 4.36 17.26 0.03 24.24 1.00 1899 559
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.10 83.54 -168.99 171.08 1.00 3000 473
## hiv_iIndexHIVPve -0.02 2.13 -4.08 3.93 1.00 3000 710
## ageyears_i 0.06 2.20 -4.51 4.52 1.00 3000 441
## siteMangaung -0.01 1.96 -3.98 3.64 1.00 2567 523
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
prior_fe <- fixef(prior_fit, summary = TRUE, robust = FALSE, probs = c(0.025, 0.975)) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(rowname != "Intercept") %>%
select(rowname, Q2.5, Q97.5) %>%
mutate_at(vars(Q2.5, Q97.5), exp)
prior_fe %>%
gt()## Warning: `prepend()` is deprecated as of rlang 0.4.0.
##
## Vector tools are now out of scope for rlang to make it a more
## focused package.
## This warning is displayed once per session.
| Q2.5 | Q97.5 | |
|---|---|---|
| hiv_iIndexHIVPve | 0.01685077 | 51.05898 |
| ageyears_i | 0.01104263 | 91.68674 |
| siteMangaung | 0.01876772 | 38.05673 |
So these are pretty diffuse priors.
Model 1: estimating the proportion of household contacts with TST induration >=5mm.
#define priors
prior <- c(
prior(normal(0, 2), class = Intercept),
prior(normal(0, 2), class = "b"),
prior(cauchy(0,1), sd)
)
#fit model
fit1 <- brm(tst_5mm ~
#Main exposure
hiv_i +
#Index case characteristics
ageyears_i +
#Household characteristics
site +
#random intercepts for househod
(1 | record_id),
data=agegps,
family = bernoulli,
prior = prior,
seed = 13,
chains = 3,
control=list(adapt_delta=0.99))## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## recompiling to avoid crashing R session
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -D_REENTRANT -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -include stan/math/prim/mat/fun/Eigen.hpp -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000471 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 4.71 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 38.9198 seconds (Warm-up)
## Chain 1: 14.8577 seconds (Sampling)
## Chain 1: 53.7775 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000207 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 2.07 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 38.3511 seconds (Warm-up)
## Chain 2: 15.3712 seconds (Sampling)
## Chain 2: 53.7223 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.000274 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 2.74 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 39.066 seconds (Warm-up)
## Chain 3: 15.5277 seconds (Sampling)
## Chain 3: 54.5937 seconds (Total)
## Chain 3:
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Family: bernoulli
## Links: mu = logit
## Formula: tst_5mm ~ hiv_i + ageyears_i + site + (1 | record_id)
## Data: agegps (Number of observations: 2725)
## Samples: 3 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~record_id (Number of levels: 877)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.65 0.15 1.39 1.97 1.00 753 1576
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.99 0.31 -2.61 -1.43 1.00 1851 2111
## hiv_iIndexHIVPve -0.22 0.19 -0.58 0.15 1.00 1797 2454
## ageyears_i -0.02 0.01 -0.04 -0.01 1.00 2074 2260
## siteMangaung 1.12 0.19 0.76 1.52 1.00 2056 2239
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
Hypothesis: do household contacts of HIV-positive index cases have a lower probability of TST positivity?
## b_hiv_iIndexHIVPve
## 0.881
Look at model output
#Look at the posterior odds ratios
f1_graph_data <- posterior_samples(fit1)
f1_graph_data <- f1_graph_data %>%
select(starts_with("b_")) %>%
gather(key, value) %>%
group_by(key) %>%
median_qi(value, .width=0.95) %>%
mutate_at(vars(value, .lower, .upper), exp) %>%
mutate(model = "tst >=5mm")
f1_graph_data %>%
gt() %>%
fmt_number(
columns = vars(value, .lower, .upper),
decimals = 2,
use_seps = FALSE
)| key | value | .lower | .upper | .width | .point | .interval | model |
|---|---|---|---|---|---|---|---|
| b_ageyears_i | 0.98 | 0.96 | 0.99 | 0.95 | median | qi | tst >=5mm |
| b_hiv_iIndexHIVPve | 0.80 | 0.56 | 1.17 | 0.95 | median | qi | tst >=5mm |
| b_Intercept | 0.14 | 0.07 | 0.24 | 0.95 | median | qi | tst >=5mm |
| b_siteMangaung | 3.05 | 2.13 | 4.58 | 0.95 | median | qi | tst >=5mm |
# get the fitted estimates
nd <- agegps %>%
expand(hiv_i, site,
ageyears_i = modelr::seq_range(ageyears_i, n=100)
)
fitted_fit1 <- fitted(fit1, re_formula = NA, newdata = nd)
fitted_graph_data <- cbind(nd, fitted_fit1)
#Graph fitted estimates
f1_graph_a <- fitted_graph_data %>%
ggplot() +
geom_line(aes(y=Estimate, x=ageyears_i, group=1)) +
geom_line(aes(y=Q2.5, x=ageyears_i), group=1, linetype=4) +
geom_line(aes(y=Q97.5, x=ageyears_i), group=1, linetype=4) +
facet_grid(hiv_i~ site) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family = "Open Sans Condensed Light") +
labs(x="",
y="",
title = paste("A) Percentage (95% credible interval) of household contacts with TST induration", "\u2265","5mm"))
f1_graph_aAlternative graph
fitted_fit1_alt <- fitted(fit1, re_formula = NA, newdata = nd, probs = c(0.025, 0.05, 0.1, 0.25, 0.33, 0.66, 0.75, 0.9, 0.95, 0.975))
fitted_graph_data_alt <- cbind(nd, fitted_fit1_alt)
#Graph fitted estimates
f1_graph_a_alt <- fitted_graph_data_alt %>%
ggplot() +
geom_ribbon(aes(ymin=Q2.5, ymax=Q97.5, x=ageyears_i), group=1, fill="#EDF8E9") +
geom_ribbon(aes(ymin=Q5, ymax=Q95, x=ageyears_i), group=1, fill="#BAE4B3") +
geom_ribbon(aes(ymin=Q10, ymax=Q90, x=ageyears_i), group=1, fill="#74C476") +
geom_ribbon(aes(ymin=Q25, ymax=Q75, x=ageyears_i), group=1, fill="#31A354") +
geom_ribbon(aes(ymin=Q33, ymax=Q66, x=ageyears_i), group=1, fill="#006D2C") +
geom_line(aes(y=Estimate, x=ageyears_i, group=1)) +
facet_grid(hiv_i~ site) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family = "Open Sans Condensed Light") +
labs(x="",
y="",
title = paste("A) Mean percentage (uncertainty intervals) of household contacts with TST induration", "\u2265","5mm"))
f1_graph_a_alt#define priors
prior <- c(
prior(normal(0, 2), class = Intercept),
prior(normal(0, 2), class = "b"),
prior(cauchy(0,1), sd)
)
#fit model
fit2 <- brm(tst_10mm ~
#Main exposure
hiv_i +
#Index case characteristics
ageyears_i +
#Household characteristics
site +
#random intercepts for househod
(1 | record_id),
data=agegps,
family = bernoulli,
prior = prior,
seed = 13,
chains = 3,
control=list(adapt_delta=0.99))## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## recompiling to avoid crashing R session
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -D_REENTRANT -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -include stan/math/prim/mat/fun/Eigen.hpp -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000496 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 4.96 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 40.0984 seconds (Warm-up)
## Chain 1: 15.3312 seconds (Sampling)
## Chain 1: 55.4296 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000201 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 2.01 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 38.636 seconds (Warm-up)
## Chain 2: 15.2067 seconds (Sampling)
## Chain 2: 53.8427 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'f5ff6fdc8040aa30a1d89deb8cfd80b3' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.000209 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 2.09 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 38.0898 seconds (Warm-up)
## Chain 3: 15.9118 seconds (Sampling)
## Chain 3: 54.0016 seconds (Total)
## Chain 3:
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Family: bernoulli
## Links: mu = logit
## Formula: tst_10mm ~ hiv_i + ageyears_i + site + (1 | record_id)
## Data: agegps (Number of observations: 2725)
## Samples: 3 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~record_id (Number of levels: 877)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.64 0.16 1.34 1.95 1.00 921 1309
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.73 0.32 -3.38 -2.13 1.00 2050 2103
## hiv_iIndexHIVPve -0.25 0.19 -0.62 0.14 1.00 2222 2056
## ageyears_i -0.02 0.01 -0.03 -0.01 1.00 2450 2448
## siteMangaung 1.51 0.22 1.11 1.95 1.00 2070 2304
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
Model output
#Look at the posterior odds ratios
f2_graph_data <- posterior_samples(fit2)
f2_graph_data <- f2_graph_data %>%
select(starts_with("b_")) %>%
gather(key, value) %>%
group_by(key) %>%
median_qi(value, .width=0.95) %>%
mutate_at(vars(value, .lower, .upper), exp) %>%
mutate(model = "tst >=10mm")
f2_graph_data %>%
gt() %>%
fmt_number(
columns = vars(value, .lower, .upper),
decimals = 2,
use_seps = FALSE
)| key | value | .lower | .upper | .width | .point | .interval | model |
|---|---|---|---|---|---|---|---|
| b_ageyears_i | 0.98 | 0.97 | 0.99 | 0.95 | median | qi | tst >=10mm |
| b_hiv_iIndexHIVPve | 0.78 | 0.54 | 1.15 | 0.95 | median | qi | tst >=10mm |
| b_Intercept | 0.07 | 0.03 | 0.12 | 0.95 | median | qi | tst >=10mm |
| b_siteMangaung | 4.49 | 3.04 | 7.02 | 0.95 | median | qi | tst >=10mm |
# get the fitted estimates
nd <- agegps %>%
expand(hiv_i, site,
ageyears_i = modelr::seq_range(ageyears_i, n=100)
)
fitted_fit2 <- fitted(fit2, re_formula = NA, newdata = nd)
fitted_graph_data2 <- cbind(nd, fitted_fit2)
#Graph fitted estimates
f2_graph_a <- fitted_graph_data2 %>%
ggplot() +
geom_line(aes(y=Estimate, x=ageyears_i, group=1)) +
geom_line(aes(y=Q2.5, x=ageyears_i), group=1, linetype=4) +
geom_line(aes(y=Q97.5, x=ageyears_i), group=1, linetype=4) +
facet_grid(hiv_i~ site) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family = "Open Sans Condensed Light") +
labs(x="Index case age (years)",
y="",
title = paste("B) Percentage (95% credible interval) of household contacts with TST induration", "\u2265","10mm"))
f2_graph_aHypothesis: do household contacts of HIV-positive index cases have a lower probability of TST positivity?
## b_hiv_iIndexHIVPve
## 0.9013333
fitted_fit2_alt <- fitted(fit2, re_formula = NA, newdata = nd, probs = c(0.025, 0.05, 0.1, 0.25, 0.33, 0.66, 0.75, 0.9, 0.95, 0.975))
fitted_graph_data_alt2 <- cbind(nd, fitted_fit2_alt)
#Graph fitted estimates
f2_graph_a_alt2 <- fitted_graph_data_alt2 %>%
ggplot() +
geom_ribbon(aes(ymin=Q2.5, ymax=Q97.5, x=ageyears_i), group=1, fill="#EFF3FF") +
geom_ribbon(aes(ymin=Q5, ymax=Q95, x=ageyears_i), group=1, fill="#BDD7E7") +
geom_ribbon(aes(ymin=Q10, ymax=Q90, x=ageyears_i), group=1, fill="#6BAED6") +
geom_ribbon(aes(ymin=Q25, ymax=Q75, x=ageyears_i), group=1, fill="#3182BD") +
geom_ribbon(aes(ymin=Q33, ymax=Q66, x=ageyears_i), group=1, fill="#08519C") +
geom_line(aes(y=Estimate, x=ageyears_i, group=1)) +
facet_grid(hiv_i~ site) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_bw(base_family = "Open Sans Condensed Light") +
labs(x="Index case age (years)",
y="",
title = paste("B) Mean percentage (uncertainty intervals) of household contacts with TST induration", "\u2265","10mm"))
f2_graph_a_alt2This reproduction of the analysis was run by:
| keyName | value |
|---|---|
| sysname | Darwin |
| release | 19.3.0 |
| version | Darwin Kernel Version 19.3.0: Thu Jan 9 20:58:23 PST 2020; root:xnu-6153.81.5~1/RELEASE_X86_64 |
| nodename | petermacphersons-MacBook-Pro.local |
| machine | x86_64 |
| login | root |
| user | peter.macpherson |
| effective_user | peter.macpherson |
Analysis was run at 2020-02-26 10:19:43, and using the following Session Info:
R version 3.6.2 (2019-12-12)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.3
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] here_0.1 patchwork_1.0.0 cowplot_1.0.0
[4] janitor_1.2.1 gt_0.1.0 Hmisc_4.3-1
[7] Formula_1.2-3 survival_3.1-8 lattice_0.20-38
[10] tidybayes_2.0.1 brms_2.11.1 Rcpp_1.0.3
[13] arsenal_3.4.0 knitr_1.28 pmthemes_0.0.0.9000
[16] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.4
[19] purrr_0.3.3 readr_1.3.1 tidyr_1.0.2
[22] tibble_2.1.3 ggplot2_3.2.1 tidyverse_1.3.0
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.1.5 plyr_1.8.5
[4] igraph_1.2.4.2 lazyeval_0.2.2 splines_3.6.2
[7] svUnit_0.7-12 crosstalk_1.0.0 rstantools_2.0.0
[10] inline_0.3.15 digest_0.6.24 htmltools_0.4.0
[13] rsconnect_0.8.16 fansi_0.4.1 magrittr_1.5
[16] checkmate_2.0.0 cluster_2.1.0 modelr_0.1.5
[19] matrixStats_0.55.0 xts_0.12-0 prettyunits_1.1.1
[22] jpeg_0.1-8.1 colorspace_1.4-1 rvest_0.3.5
[25] haven_2.2.0 xfun_0.12 callr_3.4.2
[28] crayon_1.3.4 jsonlite_1.6.1 zoo_1.8-7
[31] glue_1.3.1.9000 gtable_0.3.0 pkgbuild_1.0.6
[34] rstan_2.19.3 abind_1.4-5 scales_1.1.0
[37] mvtnorm_1.0-12 DBI_1.1.0 miniUI_0.1.1.1
[40] xtable_1.8-4 htmlTable_1.13.3 foreign_0.8-72
[43] stats4_3.6.2 StanHeaders_2.21.0-1 DT_0.12
[46] htmlwidgets_1.5.1 httr_1.4.1 threejs_0.3.3
[49] arrayhelpers_1.1-0 RColorBrewer_1.1-2 ellipsis_0.3.0
[52] acepack_1.4.1 farver_2.0.3 pkgconfig_2.0.3
[55] loo_2.2.0 nnet_7.3-12 sass_0.1.2.1
[58] dbplyr_1.4.2 utf8_1.1.4 labeling_0.3
[61] tidyselect_1.0.0 rlang_0.4.4 reshape2_1.4.3
[64] later_1.0.0 munsell_0.5.0 cellranger_1.1.0
[67] tools_3.6.2 cli_2.0.1 generics_0.0.2
[70] broom_0.5.4 ggridges_0.5.2 evaluate_0.14
[73] fastmap_1.0.1 yaml_2.2.1 processx_3.4.2
[76] fs_1.3.1 nlme_3.1-142 mime_0.9
[79] xml2_1.2.2 compiler_3.6.2 bayesplot_1.7.1
[82] shinythemes_1.1.2 rstudioapi_0.11 png_0.1-7
[85] reprex_0.3.0 stringi_1.4.6 highr_0.8
[88] ps_1.3.2 Brobdingnag_1.2-6 Matrix_1.2-18
[91] commonmark_1.7 markdown_1.1 shinyjs_1.1
[94] vctrs_0.2.2 pillar_1.4.3 lifecycle_0.1.0
[97] bridgesampling_0.8-1 data.table_1.12.8 httpuv_1.5.2
[100] R6_2.4.1 latticeExtra_0.6-29 promises_1.1.0
[103] gridExtra_2.3 codetools_0.2-16 colourpicker_1.0
[106] gtools_3.8.1 assertthat_0.2.1 rprojroot_1.3-2
[109] withr_2.1.2 shinystan_2.5.0 parallel_3.6.2
[112] hms_0.5.3 grid_3.6.2 rpart_4.1-15
[115] coda_0.19-3 snakecase_0.11.0 rmarkdown_2.1
[118] shiny_1.4.0 lubridate_1.7.4 base64enc_0.1-3
[121] dygraphs_1.1.1.6